library(tidyverse)
library(pivottabler)
library(plotly)
library(psych)
install.packages("ggplot2::")
options(scipen=999)
mov <- read.csv("~/project-ionic/R/week4/movies.csv")
head(mov)
names(mov)
summary (mov)
str(mov)
year_num <- mov %>% group_by(year) %>% summarize(num = n())
p<- plot_ly(year_num, x = ~year, y = ~num, name = 'Tree 1', type = 'scatter',
mode = 'lines')
p
with(mov, plot_ly(iris, x = genre, y= rating, z = year,
size = year, color = genre,
type="scatter3d", mode="markers"))
my_cols <- c("purple", "violetred1", "green3","red", "cyan", "steelblue","white","orange","pink","gray","black","maroon",
"coral","forestgreen","darkblue","lavender","ivory")
pairs(mov[,1:5], pch = 19, cex = 0.5,
col = my_cols[mov$genre],
lower.panel=NULL)
library(RColorBrewer)
barplot(table(mov$runtime,mov$score),col = brewer.pal(4,"Set1"))
genrePie <- mov %>% group_by(genre) %>% summarize (num = n())
genrePie
data=c(1331,392,277,359,2080,522,1444,14,32,277,4,38,15,13,18,2,2)
pct = (data/sum(data))*100
pct = round(pct,2)
labels = c("Action", "Adventure", "Animation",
"Biography", "Comedy","Crime","Drama","Family ","Fantasy","Horror","Musical ","Mystery","Romance"
,"Sci-Fi","Thriller","War","Western")
labels = paste(labels,pct, "%")
col = c("purple", "violetred1", "green3","red", "cyan", "steelblue","white","orange","pink","gray","black","maroon",
"coral","forestgreen","darkblue","lavender","ivory")
pie(pct,col = col, radius = 0.8, init.angle = 180, clockwise = TRUE,
labels =labels, main = "Genre Percentage")
plot_ly(genrePie, x = ~genre, y = ~num, type = 'bar', name = 'Sepal.Width') %>%
layout(yaxis = list(title = 'Count'), barmode = 'stack')
head(mov[order(mov$score, decreasing = T), ], 10)
mean(mov$score)
plot( mov$score,mov$budget, type = "point",
pch=21, bg=c("yellow"), xlab = "Score", ylab ="Budget",
main="score and budget")
head(mov[order(mov$vote, decreasing = T), ], 10)
mean(mov$votes)
qplot(score, votes, data = mov, geom = c("point", "line"), color = I("red"))
head(mov[order(mov$gross, decreasing = T), ], 10)
mean(mov$gross)
qplot(score, gross, data = mov, geom = c("line"), color = I("blue"))
Ste <- mov %>% filter(mov$director == "Steven Spielberg")
head(Ste[order(Ste$score, decreasing = T), ], 10)
pairs.panels(Ste[,-5],
method = "pearson", # correlation method
hist.col = "#00AFBB",
density = TRUE, # show density plots
ellipses = TRUE # show correlation ellipses
)
qhpvt(Ste, "score", "genre", "n()")
qplot(genre,score, data = Ste,
geom = c("point", "line"))
qhpvt(Ste, "company", "genre", "n()")
DreamW <- mov %>% filter (company == "DreamWorks" )
mean(DreamW$score)
ggplot(DreamW, aes(x=year,y=score,color=genre)) + geom_point(alpha=0.5) + coord_fixed() + labs(title="DreamWorks",
x ="Year", y = "Score")
plot_ly(data = DreamW, x = ~year, y = ~score, color = ~genre)
universalP <- mov %>% filter (company == "Universal Pictures" )
ggplot(universalP, aes(x=year,y=score,color=genre)) + geom_point(alpha=0.5) + coord_fixed()+ labs(title="Universal Pictures",
x ="Year", y = "Score")
plot_ly(data = universalP, x = ~year, y = ~score, color = ~genre)
qhpvt(Ste, "country", "genre", "n()")
qhpvt(mov, "runtime", "score", "n()")
ggplot (Ste, aes (x = score, y = runtime, colour = score)) + stat_density2d ()
head(mov[order(mov$score, decreasing = T),], 10)
YMov <- filter(mov, year >= "2010" )
head(YMov[order(YMov$score, decreasing = T),], 10)
YCMov <- filter(mov, year >= "2010" & (country =="USA" | country =="UK" ) )
summary(YCMov)
str(YCMov)
head(YCMov[order(YCMov$score, decreasing = T),], 10)
YCSMov <- filter(mov, year >= "2010" & (country =="USA" | country =="UK" ) & score >= "7")
head(YCSMov[order(YCSMov$score, decreasing = T),], 10)
YCSGMov <- filter(mov, year >= "2010" & (country =="USA" | country =="UK" ) & score >= "8" & gross >= budget)
head(YCSGMov[order(YCSGMov$score, decreasing = T),], 10)
qhpvt(YCSGMov, "company", "score", "n()")
install.packages("ggrepel")
library("ggrepel")
YCSGCMov <- YCSGMov %>%group_by(company) %>% summarize ("total" = n () )
ggplot(YCSGCMov) + geom_bar(stat = "identity", color = 'steelblue', aes(x = company, y = total)) + theme(axis.text.x=element_text(size=rel(1), angle=90))
YCSGWMov <- YCSGMov %>%group_by(writer) %>% summarize ("total" = n () )
ggplot(YCSGWMov, aes(x = writer, y = total, group = 1)) + geom_point(colour="steelblue") + geom_line() +theme(axis.text.x=element_text(size=rel(1), angle=90))
YCSGSMov <- YCSGMov %>%group_by(star) %>% summarize ("total" = n () )
qhpvt(YCSGMov, "star", "score", "n()")
ggplot(YCSGSMov, aes(x = total, y = star, group = 1)) + geom_step(colour="orange") + geom_count(colour="pink") +theme(axis.text.x=element_text(size=rel(1), angle=90))
select_Leona = subset(mov, star == "Leonardo DiCaprio", select = c(name, score, star, year))
head(select_Leona[order(select_Leona$score, decreasing = T), ], 10)
qhpvt(select_Leona, "year", "score", "n()")
select_Matthew = subset(mov, star == "Matthew McConaughey", select = c(name, score, star, year))
head(select_Matthew[order(select_Matthew$score, decreasing = T), ], 10)
qhpvt(select_Matthew, "year", "score", "n()")
plot_ly(alpha = 0.6) %>%
add_histogram(x = ~select_Matthew$score, name ="Matthew") %>%
add_histogram(x = ~select_Leona$score, name = "Leona") %>%
layout(barmode = "overlay")